Open-Source
ML testing library
Automatically run test suites with our Python library.
import giskard
from sklearn.pipeline import Pipeline
# Pipeline for the sklearn model
clf = Pipeline(...)
clf.fit(...)
# Wrap your Pandas DataFrame
dataset = giskard.Dataset(
df=titanic_df, target="Survived"
)
# Wrap your model
model = giskard.Model(
model=clf.predict_proba,
model_type="classification"
)
# Scan for vulnerabilities
results = giskard.scan(model, dataset)
from sklearn.pipeline import Pipeline
# Pipeline for the sklearn model
clf = Pipeline(...)
clf.fit(...)
# Wrap your Pandas DataFrame
dataset = giskard.Dataset(
df=titanic_df, target="Survived"
)
# Wrap your model
model = giskard.Model(
model=clf.predict_proba,
model_type="classification"
)
# Scan for vulnerabilities
results = giskard.scan(model, dataset)
Copy to clipboard
import giskard
from transformers import AutoTokenizer
from transformers import TFAutoModel
tk = AutoTokenizer.from_pretrained(...)
hf = TFAutoModel.from_pretrained(...)
# Pipeline for the model prediction
def pred_func(df):
return softmax(hf(**tk(...).logits))
# Wrap your Pandas DataFrame and model
dataset = giskard.Dataset(df=text_df, ...)
model = giskard.Model(model=pred_func, ...)
# Scan for vulnerabilities
results = giskard.scan(model, dataset)
from transformers import AutoTokenizer
from transformers import TFAutoModel
tk = AutoTokenizer.from_pretrained(...)
hf = TFAutoModel.from_pretrained(...)
# Pipeline for the model prediction
def pred_func(df):
return softmax(hf(**tk(...).logits))
# Wrap your Pandas DataFrame and model
dataset = giskard.Dataset(df=text_df, ...)
model = giskard.Model(model=pred_func, ...)
# Scan for vulnerabilities
results = giskard.scan(model, dataset)
Copy to clipboard
import giskard
from torchtext.models
import XLMR_BASE_ENCODER
model = XLMR_BASE_ENCODER.get_model(head=h)
# Pipeline for the model prediction
def pred_func(df):
output_df = DataLoader(df.map(T)) return [model(i) for i in output_df]
# Wrap your Pandas DataFrame and model
dataset = giskard.Dataset(df=text_df, ...)
model = giskard.Model(model=pred_func, ...)
# Scan for vulnerabilities
results = giskard.scan(model, dataset)
from torchtext.models
import XLMR_BASE_ENCODER
model = XLMR_BASE_ENCODER.get_model(head=h)
# Pipeline for the model prediction
def pred_func(df):
output_df = DataLoader(df.map(T)) return [model(i) for i in output_df]
# Wrap your Pandas DataFrame and model
dataset = giskard.Dataset(df=text_df, ...)
model = giskard.Model(model=pred_func, ...)
# Scan for vulnerabilities
results = giskard.scan(model, dataset)
Copy to clipboard
import giskard
model = ... # tensorflow model
# Define a custom wrapper
class MyTensorFlowModel(giskard.Model): def model_predict(self, df):
return self.model.predict(
pipeline.transform(df))
# Wrap your Pandas DataFrame and model
dataset = giskard.Dataset(df, ...)
model = MyTensorFlowModel(model, ...)
# Scan for vulnerabilities
results = giskard.scan(model, dataset)
model = ... # tensorflow model
# Define a custom wrapper
class MyTensorFlowModel(giskard.Model): def model_predict(self, df):
return self.model.predict(
pipeline.transform(df))
# Wrap your Pandas DataFrame and model
dataset = giskard.Dataset(df, ...)
model = MyTensorFlowModel(model, ...)
# Scan for vulnerabilities
results = giskard.scan(model, dataset)
Copy to clipboard
import giskard
from langchain import chains, prompts
llm = ...
prompt = prompts.PromptTemplate( input_variables=["product"], template="..."
)
chain = chains.LLMChain(
llm=llm,
prompt=prompt
)
# Wrap your Pandas DataFrame and model
dataset = giskard.Dataset(df, ...)
model = giskard.Model(chain, ...)
# Scan for vulnerabilities
results = giskard.scan(model, dataset)
from langchain import chains, prompts
llm = ...
prompt = prompts.PromptTemplate( input_variables=["product"], template="..."
)
chain = chains.LLMChain(
llm=llm,
prompt=prompt
)
# Wrap your Pandas DataFrame and model
dataset = giskard.Dataset(df, ...)
model = giskard.Model(chain, ...)
# Scan for vulnerabilities
results = giskard.scan(model, dataset)
Copy to clipboard
import giskard, requests
def pred_func(input_data):
# Set up the API endpoint URL
api = "https://api.example.com/predict"
# Send GET request to API & get response
response = requests.get(
api, params={"input": input_data}
)
# Extract predictions from JSON response
return ...
# Wrap your Pandas DataFrame and model dataset = giskard.Dataset(df, ...)
model = giskard.Model(pred_func, ...)
# Scan for vulnerabilities
results = giskard.scan(model, dataset)
def pred_func(input_data):
# Set up the API endpoint URL
api = "https://api.example.com/predict"
# Send GET request to API & get response
response = requests.get(
api, params={"input": input_data}
)
# Extract predictions from JSON response
return ...
# Wrap your Pandas DataFrame and model dataset = giskard.Dataset(df, ...)
model = giskard.Model(pred_func, ...)
# Scan for vulnerabilities
results = giskard.scan(model, dataset)
Copy to clipboard

Product workflow
Deliver ML products, better & faster. Become a ML superhero.
Get startedScan
Automatically detect the vulnerabilities of ML models.
Test
Run custom tests to protect against risks of regression.
CI/CD
Automatically publish reports in your CI/CD pipeline.
Monitor
Get alerted when something is wrong in production.
Integrates with your favorite Machine Learning tools
Detect hidden vulnerabilities in your ML model
Hallucination and Misinformation
Safeguard against non-factual outputs, preserving accuracy
Harmful Content Generation
Ensure models steer clear of malicious or harmful response
Prompt Injection
Guard against LLM manipulations that bypass filters or override model instructions
Information disclosure
Guarantee user privacy, ensuring LLMs doesn't divulge sensitive data
Robustness
Detect when model outputs are sensitive to small perturbations in the input data
Stereotypes & Discrimination
Avoid model outputs that perpetuate biases, stereotypes, or discriminatory content
Performance bias
Identify discrepancies in accuracy, precision, recall, or other evaluation metrics on specific data slices.
Unrobustness
Detect when your model is sensitive to small perturbations in the input data.
Overconfidence
Avoid incorrect predictions when your model is overly confident.
Stochasticity
Detect inherent randomness in your model and avoid variations in your results.
Data leakage
Detect inflated performance metrics and inaccuracy due to unintentional external data used in your model.
Unethical behavior
Identify perturbations in your model behavior when switching input data (gender, ethnicity...).
Streamline your ML testing process for tabular models
Detect vulnerabilities and run test suites, directly in your environment. Get your models production-ready in no time.
import giskard
demo_sklearn_model, df = giskard.demo.titanic()dataset = giskard.Dataset(df, target="Survived")
model = giskard.Model(demo_sklearn_model,
model_type="classification")
giskard.scan(model, dataset)
demo_sklearn_model, df = giskard.demo.titanic()dataset = giskard.Dataset(df, target="Survived")
model = giskard.Model(demo_sklearn_model,
model_type="classification")
giskard.scan(model, dataset)
Copy to clipboard
Avoid generic benchmark evaluation datasets
Test your LLM application
Generate automated tests for precise & contextual assessments, from RAG to chatbots.
GET STARTED


.webp)

Integrate your test suite into your CI/CD
Automatically generate a test suite based on detected vulnerabilities, and integrate it directly in your CI/CD pipeline.
Try python Library
Discover even more ML testing capabilities with the Giskard Hub
A collaborative Hub to unify AI Quality processes, share test results with your team, compare models & debug to find the root cause of ML biases.
Get startedJoin the community
Welcome to an inclusive community focused on ML Quality! Join us to share best practices, create new tests, and shape the future of AI safety standards together.
Discord
All those interested in ML Quality are welcome here!