-
Notifications
You must be signed in to change notification settings - Fork 11
feat(providers): add Dockerfile/Containerfile provider for image analysis #569
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,107 @@ | ||
| import fs from 'node:fs' | ||
|
|
||
| import { generateImageSBOM, parseImageRef } from '../oci_image/utils.js' | ||
|
|
||
| export default { isSupported, validateLockFile, provideComponent, provideStack, readLicenseFromManifest, packageManagerName() { return 'oci' } } | ||
|
|
||
| /** @typedef {import('../provider').Provider} */ | ||
|
|
||
| /** @typedef {import('../provider').Provided} Provided */ | ||
|
|
||
| /** | ||
| * @type {string} ecosystem identifier for OCI image packages | ||
| * @private | ||
| */ | ||
| const ecosystem = 'oci' | ||
|
|
||
| /** | ||
| * Check if the given manifest name is a Dockerfile or Containerfile. | ||
| * @param {string} manifestName the manifest file name to check | ||
| * @returns {boolean} true if the manifest is a Dockerfile or Containerfile | ||
| */ | ||
| function isSupported(manifestName) { | ||
| return manifestName === 'Dockerfile' || manifestName === 'Containerfile' | ||
| } | ||
|
|
||
| /** | ||
| * Dockerfiles have no lock file, so validation always passes. | ||
| * @returns {boolean} always true | ||
| */ | ||
| function validateLockFile() { return true; } | ||
|
|
||
| /** | ||
| * Parse the last FROM line from a Dockerfile to extract the base image reference. | ||
| * In multi-stage builds, the last FROM represents the final stage. | ||
| * @param {string} manifestContent the content of the Dockerfile | ||
| * @returns {string} the image reference from the last FROM line | ||
| * @throws {Error} when no FROM line is found in the Dockerfile | ||
| */ | ||
| export function parseFromImage(manifestContent) { | ||
| const lines = manifestContent.split(/\r?\n/) | ||
| let lastFrom = null | ||
| for (const line of lines) { | ||
| const trimmed = line.trim() | ||
| if (/^FROM\s+/i.test(trimmed)) { | ||
|
sourcery-ai[bot] marked this conversation as resolved.
|
||
| // Extract image ref: FROM [--flag=val ...] image [AS name] | ||
| const tokens = trimmed.replace(/^FROM\s+/i, '').split(/\s+/) | ||
| // Skip all leading --flag tokens (e.g. --platform=linux/amd64) | ||
| let i = 0 | ||
| while (i < tokens.length && tokens[i].startsWith('--')) { | ||
| i++ | ||
| } | ||
| lastFrom = tokens[i] || null | ||
| } | ||
|
sourcery-ai[bot] marked this conversation as resolved.
Comment on lines
+39
to
+53
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice to use a proper parser, we already use tree-sitter in some places in this repo and theres a dockerfile/containerfile parser for it here: https://github.com/wharflab/tree-sitter-containerfile Our lack of using proper parsers in the java client is already a bit problematic, so if we can continue the trend of using parsers at least in the javascript client, thatd be great
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @a-oren make sure the use of parsers is preferred in the conventions file, specially if it is already used in other places like tree-sitter |
||
| } | ||
| if (!lastFrom) { | ||
| throw new Error('No FROM line found in Dockerfile') | ||
| } | ||
| if (lastFrom.includes('${')) { | ||
| throw new Error('Dockerfile uses ARG substitution in FROM line — cannot resolve variable references') | ||
| } | ||
| return lastFrom | ||
| } | ||
|
|
||
| /** | ||
| * Generate an image SBOM from a Dockerfile manifest using syft. | ||
| * @param {string} manifest path to the Dockerfile | ||
| * @param {{}} [opts={}] optional various options to pass along the application | ||
| * @returns {{ecosystem: string, content: string, contentType: string}} | ||
| * @private | ||
| */ | ||
| function getImageSBOM(manifest, opts = {}) { | ||
| const manifestContent = fs.readFileSync(manifest, 'utf-8') | ||
| const image = parseFromImage(manifestContent) | ||
| const imageRef = parseImageRef(image, opts) | ||
| const sbom = generateImageSBOM(imageRef, opts) | ||
| return { | ||
| ecosystem, | ||
| content: JSON.stringify(sbom), | ||
| contentType: 'application/vnd.cyclonedx+json' | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Provide content and content type for Dockerfile component analysis. | ||
| * @param {string} manifest path to the Dockerfile | ||
| * @param {{}} [opts={}] optional various options to pass along the application | ||
| * @returns {Provided} | ||
| */ | ||
| function provideComponent(manifest, opts = {}) { | ||
| return getImageSBOM(manifest, opts) | ||
| } | ||
|
|
||
| /** | ||
| * Provide content and content type for Dockerfile stack analysis. | ||
| * @param {string} manifest path to the Dockerfile | ||
| * @param {{}} [opts={}] optional various options to pass along the application | ||
| * @returns {Provided} | ||
| */ | ||
| function provideStack(manifest, opts = {}) { | ||
| return getImageSBOM(manifest, opts) | ||
| } | ||
|
|
||
| /** | ||
| * Dockerfiles contain no license information. | ||
| * @returns {null} always null | ||
| */ | ||
| function readLicenseFromManifest() { return null; } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| import { expect } from 'chai' | ||
|
|
||
| import dockerfileProvider, { parseFromImage } from '../../src/providers/oci_dockerfile.js' | ||
|
|
||
| suite('testing the Dockerfile/Containerfile data provider', () => { | ||
|
|
||
| suite('isSupported', () => { | ||
| /** Verifies that isSupported returns true for Dockerfile and Containerfile, false for others. */ | ||
| ['Dockerfile', 'Containerfile'].forEach(name => { | ||
| test(`returns true for ${name}`, () => { | ||
| expect(dockerfileProvider.isSupported(name)).to.equal(true) | ||
| }) | ||
| }); | ||
|
|
||
| ['package.json', 'go.mod', 'Cargo.toml', 'dockerfile', 'containerfile', 'Dockerfile.dev'].forEach(name => { | ||
| test(`returns false for ${name}`, () => { | ||
| expect(dockerfileProvider.isSupported(name)).to.equal(false) | ||
| }) | ||
| }) | ||
| }) | ||
|
|
||
| suite('validateLockFile', () => { | ||
| /** Verifies that validateLockFile always returns true since Dockerfiles have no lock file. */ | ||
| test('always returns true', () => { | ||
| expect(dockerfileProvider.validateLockFile()).to.equal(true) | ||
| }) | ||
| }) | ||
|
|
||
| suite('readLicenseFromManifest', () => { | ||
| /** Verifies that readLicenseFromManifest returns null since Dockerfiles have no license info. */ | ||
| test('returns null', () => { | ||
| expect(dockerfileProvider.readLicenseFromManifest()).to.equal(null) | ||
| }) | ||
| }) | ||
|
|
||
| suite('packageManagerName', () => { | ||
| /** Verifies that packageManagerName returns oci. */ | ||
| test('returns oci', () => { | ||
| expect(dockerfileProvider.packageManagerName()).to.equal('oci') | ||
| }) | ||
| }) | ||
|
|
||
| suite('parseFromImage', () => { | ||
| /** Verifies that a single FROM line extracts the correct image reference. */ | ||
| test('extracts image from single-stage Dockerfile', () => { | ||
| const content = 'FROM node:18\nRUN npm install\n' | ||
| expect(parseFromImage(content)).to.equal('node:18') | ||
| }) | ||
|
|
||
| /** Verifies that the last FROM line is used in multi-stage Dockerfiles. */ | ||
| test('uses last FROM in multi-stage Dockerfile', () => { | ||
| const content = [ | ||
| 'FROM node:18 AS builder', | ||
| 'RUN npm run build', | ||
| '', | ||
| 'FROM nginx:alpine', | ||
| 'COPY --from=builder /app/dist /usr/share/nginx/html', | ||
| ].join('\n') | ||
| expect(parseFromImage(content)).to.equal('nginx:alpine') | ||
| }) | ||
|
|
||
| /** Verifies that a single --platform flag is skipped when parsing FROM lines. */ | ||
| test('handles --platform flag', () => { | ||
| const content = 'FROM --platform=linux/amd64 ubuntu:22.04\n' | ||
| expect(parseFromImage(content)).to.equal('ubuntu:22.04') | ||
| }) | ||
|
|
||
| /** Verifies that multiple flags before the image reference are all skipped. */ | ||
| test('handles multiple flags before image', () => { | ||
| const content = 'FROM --platform=linux/amd64 --some-flag=value ubuntu:22.04 AS base\n' | ||
| expect(parseFromImage(content)).to.equal('ubuntu:22.04') | ||
| }) | ||
|
|
||
| /** Verifies that image references with digests are parsed correctly. */ | ||
| test('handles image with digest', () => { | ||
| const content = 'FROM httpd@sha256:abc123\n' | ||
| expect(parseFromImage(content)).to.equal('httpd@sha256:abc123') | ||
| }) | ||
|
|
||
| /** Verifies that ARG-substituted FROM targets are rejected with a clear error. */ | ||
| test('throws when FROM target uses ARG substitution', () => { | ||
| const content = 'ARG BASE_IMAGE=ubuntu:22.04\nFROM ${BASE_IMAGE}\n' | ||
| expect(() => parseFromImage(content)).to.throw('Dockerfile uses ARG substitution in FROM line') | ||
| }) | ||
|
|
||
| /** Verifies that an error is thrown when no FROM line is present. */ | ||
| test('throws when no FROM line found', () => { | ||
| const content = 'RUN echo hello\n' | ||
| expect(() => parseFromImage(content)).to.throw('No FROM line found in Dockerfile') | ||
| }) | ||
|
|
||
| /** Verifies that FROM line parsing is case-insensitive. */ | ||
| test('handles case-insensitive FROM keyword', () => { | ||
| const content = 'from alpine:3.18\n' | ||
| expect(parseFromImage(content)).to.equal('alpine:3.18') | ||
| }) | ||
|
|
||
| /** Verifies that comment lines and blank lines are ignored. */ | ||
| test('ignores comments and blank lines', () => { | ||
| const content = [ | ||
| '# This is a comment', | ||
| '', | ||
| 'FROM registry.example.com/myapp:latest', | ||
| ].join('\n') | ||
| expect(parseFromImage(content)).to.equal('registry.example.com/myapp:latest') | ||
| }) | ||
| }) | ||
| }) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've seen a lot of cases where people will have multiple Dockerfiles but with different suffixes (see https://sourcegraph.com/search?q=context:global+f:/Dockerfile%5C..*&patternType=keyword&case=yes&sm=0), so it would be good to support those as well imo
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree we should support suffixes