Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/provider.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import Javascript_bun from './providers/javascript_bun.js';
import Javascript_npm from './providers/javascript_npm.js';
import Javascript_pnpm from './providers/javascript_pnpm.js';
import Javascript_yarn from './providers/javascript_yarn.js';
import dockerfileProvider from './providers/oci_dockerfile.js'
import pythonPipProvider from './providers/python_pip.js'
import Python_pip_pyproject from './providers/python_pip_pyproject.js'
import Python_poetry from './providers/python_poetry.js'
Expand All @@ -34,7 +35,8 @@ export const availableProviders = [
new Python_poetry(),
new Python_uv(),
new Python_pip_pyproject(),
rustCargoProvider]
rustCargoProvider,
dockerfileProvider]

/**
* Match a provider by manifest type only (no lock file check). Used for license reading.
Expand Down
107 changes: 107 additions & 0 deletions src/providers/oci_dockerfile.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import fs from 'node:fs'

import { generateImageSBOM, parseImageRef } from '../oci_image/utils.js'

export default { isSupported, validateLockFile, provideComponent, provideStack, readLicenseFromManifest, packageManagerName() { return 'oci' } }

/** @typedef {import('../provider').Provider} */

/** @typedef {import('../provider').Provided} Provided */

/**
* @type {string} ecosystem identifier for OCI image packages
* @private
*/
const ecosystem = 'oci'

/**
* Check if the given manifest name is a Dockerfile or Containerfile.
* @param {string} manifestName the manifest file name to check
* @returns {boolean} true if the manifest is a Dockerfile or Containerfile
*/
function isSupported(manifestName) {
return manifestName === 'Dockerfile' || manifestName === 'Containerfile'

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've seen a lot of cases where people will have multiple Dockerfiles but with different suffixes (see https://sourcegraph.com/search?q=context:global+f:/Dockerfile%5C..*&patternType=keyword&case=yes&sm=0), so it would be good to support those as well imo

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree we should support suffixes

}

/**
* Dockerfiles have no lock file, so validation always passes.
* @returns {boolean} always true
*/
function validateLockFile() { return true; }

/**
* Parse the last FROM line from a Dockerfile to extract the base image reference.
* In multi-stage builds, the last FROM represents the final stage.
* @param {string} manifestContent the content of the Dockerfile
* @returns {string} the image reference from the last FROM line
* @throws {Error} when no FROM line is found in the Dockerfile
*/
export function parseFromImage(manifestContent) {
const lines = manifestContent.split(/\r?\n/)
let lastFrom = null
for (const line of lines) {
const trimmed = line.trim()
if (/^FROM\s+/i.test(trimmed)) {
Comment thread
sourcery-ai[bot] marked this conversation as resolved.
// Extract image ref: FROM [--flag=val ...] image [AS name]
const tokens = trimmed.replace(/^FROM\s+/i, '').split(/\s+/)
// Skip all leading --flag tokens (e.g. --platform=linux/amd64)
let i = 0
while (i < tokens.length && tokens[i].startsWith('--')) {
i++
}
lastFrom = tokens[i] || null
}
Comment thread
sourcery-ai[bot] marked this conversation as resolved.
Comment on lines +39 to +53

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to use a proper parser, we already use tree-sitter in some places in this repo and theres a dockerfile/containerfile parser for it here: https://github.com/wharflab/tree-sitter-containerfile

Our lack of using proper parsers in the java client is already a bit problematic, so if we can continue the trend of using parsers at least in the javascript client, thatd be great

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@a-oren make sure the use of parsers is preferred in the conventions file, specially if it is already used in other places like tree-sitter

}
if (!lastFrom) {
throw new Error('No FROM line found in Dockerfile')
}
if (lastFrom.includes('${')) {
throw new Error('Dockerfile uses ARG substitution in FROM line — cannot resolve variable references')
}
return lastFrom
}

/**
* Generate an image SBOM from a Dockerfile manifest using syft.
* @param {string} manifest path to the Dockerfile
* @param {{}} [opts={}] optional various options to pass along the application
* @returns {{ecosystem: string, content: string, contentType: string}}
* @private
*/
function getImageSBOM(manifest, opts = {}) {
const manifestContent = fs.readFileSync(manifest, 'utf-8')
const image = parseFromImage(manifestContent)
const imageRef = parseImageRef(image, opts)
const sbom = generateImageSBOM(imageRef, opts)
return {
ecosystem,
content: JSON.stringify(sbom),
contentType: 'application/vnd.cyclonedx+json'
}
}

/**
* Provide content and content type for Dockerfile component analysis.
* @param {string} manifest path to the Dockerfile
* @param {{}} [opts={}] optional various options to pass along the application
* @returns {Provided}
*/
function provideComponent(manifest, opts = {}) {
return getImageSBOM(manifest, opts)
}

/**
* Provide content and content type for Dockerfile stack analysis.
* @param {string} manifest path to the Dockerfile
* @param {{}} [opts={}] optional various options to pass along the application
* @returns {Provided}
*/
function provideStack(manifest, opts = {}) {
return getImageSBOM(manifest, opts)
}

/**
* Dockerfiles contain no license information.
* @returns {null} always null
*/
function readLicenseFromManifest() { return null; }
108 changes: 108 additions & 0 deletions test/providers/oci_dockerfile.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import { expect } from 'chai'

import dockerfileProvider, { parseFromImage } from '../../src/providers/oci_dockerfile.js'

suite('testing the Dockerfile/Containerfile data provider', () => {

suite('isSupported', () => {
/** Verifies that isSupported returns true for Dockerfile and Containerfile, false for others. */
['Dockerfile', 'Containerfile'].forEach(name => {
test(`returns true for ${name}`, () => {
expect(dockerfileProvider.isSupported(name)).to.equal(true)
})
});

['package.json', 'go.mod', 'Cargo.toml', 'dockerfile', 'containerfile', 'Dockerfile.dev'].forEach(name => {
test(`returns false for ${name}`, () => {
expect(dockerfileProvider.isSupported(name)).to.equal(false)
})
})
})

suite('validateLockFile', () => {
/** Verifies that validateLockFile always returns true since Dockerfiles have no lock file. */
test('always returns true', () => {
expect(dockerfileProvider.validateLockFile()).to.equal(true)
})
})

suite('readLicenseFromManifest', () => {
/** Verifies that readLicenseFromManifest returns null since Dockerfiles have no license info. */
test('returns null', () => {
expect(dockerfileProvider.readLicenseFromManifest()).to.equal(null)
})
})

suite('packageManagerName', () => {
/** Verifies that packageManagerName returns oci. */
test('returns oci', () => {
expect(dockerfileProvider.packageManagerName()).to.equal('oci')
})
})

suite('parseFromImage', () => {
/** Verifies that a single FROM line extracts the correct image reference. */
test('extracts image from single-stage Dockerfile', () => {
const content = 'FROM node:18\nRUN npm install\n'
expect(parseFromImage(content)).to.equal('node:18')
})

/** Verifies that the last FROM line is used in multi-stage Dockerfiles. */
test('uses last FROM in multi-stage Dockerfile', () => {
const content = [
'FROM node:18 AS builder',
'RUN npm run build',
'',
'FROM nginx:alpine',
'COPY --from=builder /app/dist /usr/share/nginx/html',
].join('\n')
expect(parseFromImage(content)).to.equal('nginx:alpine')
})

/** Verifies that a single --platform flag is skipped when parsing FROM lines. */
test('handles --platform flag', () => {
const content = 'FROM --platform=linux/amd64 ubuntu:22.04\n'
expect(parseFromImage(content)).to.equal('ubuntu:22.04')
})

/** Verifies that multiple flags before the image reference are all skipped. */
test('handles multiple flags before image', () => {
const content = 'FROM --platform=linux/amd64 --some-flag=value ubuntu:22.04 AS base\n'
expect(parseFromImage(content)).to.equal('ubuntu:22.04')
})

/** Verifies that image references with digests are parsed correctly. */
test('handles image with digest', () => {
const content = 'FROM httpd@sha256:abc123\n'
expect(parseFromImage(content)).to.equal('httpd@sha256:abc123')
})

/** Verifies that ARG-substituted FROM targets are rejected with a clear error. */
test('throws when FROM target uses ARG substitution', () => {
const content = 'ARG BASE_IMAGE=ubuntu:22.04\nFROM ${BASE_IMAGE}\n'
expect(() => parseFromImage(content)).to.throw('Dockerfile uses ARG substitution in FROM line')
})

/** Verifies that an error is thrown when no FROM line is present. */
test('throws when no FROM line found', () => {
const content = 'RUN echo hello\n'
expect(() => parseFromImage(content)).to.throw('No FROM line found in Dockerfile')
})

/** Verifies that FROM line parsing is case-insensitive. */
test('handles case-insensitive FROM keyword', () => {
const content = 'from alpine:3.18\n'
expect(parseFromImage(content)).to.equal('alpine:3.18')
})

/** Verifies that comment lines and blank lines are ignored. */
test('ignores comments and blank lines', () => {
const content = [
'# This is a comment',
'',
'FROM registry.example.com/myapp:latest',
].join('\n')
expect(parseFromImage(content)).to.equal('registry.example.com/myapp:latest')
})
})
})
Loading