D1 05 Chaining
LangChain Chaining Techniques¶
Introduction¶
This notebook demonstrates key chaining functionalities in LangChain:
- SimpleSequentialChain
- SequentialChain
- LLMRouterChain
- TransformChain
Each chaining method is designed for different levels of complexity and control. Use simple chains for straightforward tasks, sequential chains for workflows, router chains for conditional branching, and transform chains when integrating custom logic.
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from langchain_huggingface.llms import HuggingFacePipeline
from langchain_huggingface import ChatHuggingFace
from langchain_classic.chains import SimpleSequentialChain, SequentialChain, TransformChain, LLMChain, LLMMathChain
from langchain_classic.chains.router import LLMRouterChain, MultiPromptChain
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate
[No output generated]
# === GPU & Model Status Check ===
import gc
print("=== Initial Resource Status ===")
# GPU Status - Use pynvml for SYSTEM-WIDE memory (not just this process)
try:
import pynvml
pynvml.nvmlInit()
device_count = pynvml.nvmlDeviceGetCount()
print(f"\nGPU Count: {device_count}")
for i in range(device_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
name = pynvml.nvmlDeviceGetName(handle)
info = pynvml.nvmlDeviceGetMemoryInfo(handle)
total_gb = info.total / 1024**3
used_gb = info.used / 1024**3
free_gb = info.free / 1024**3
usage_pct = (info.used / info.total) * 100
print(f"\nGPU {i}: {name}")
print(f" Total: {total_gb:.2f} GB")
print(f" Used: {used_gb:.2f} GB ({usage_pct:.1f}%)")
print(f" Free: {free_gb:.2f} GB")
# Warning if low on memory (7B model needs ~5GB with 4-bit quantization)
if free_gb < 6.0:
print(f" ⚠️ WARNING: Low GPU memory! Model loading may fail.")
print(f" Consider running cleanup cells in other notebooks first.")
pynvml.nvmlShutdown()
except ImportError:
print("\n⚠️ pynvml not installed - falling back to PyTorch (per-process only)")
import torch
if torch.cuda.is_available():
print(f"GPU Available: {torch.cuda.get_device_name(0)}")
for i in range(torch.cuda.device_count()):
total = torch.cuda.get_device_properties(i).total_memory / 1024**3
allocated = torch.cuda.memory_allocated(i) / 1024**3
print(f" GPU {i}: {allocated:.2f} / {total:.2f} GB (THIS PROCESS ONLY)")
else:
print("No GPU available - using CPU")
except Exception as e:
print(f"\nGPU status check failed: {e}")
# No Ollama in this notebook - HuggingFace only
print("\n" + "="*40)
=== Initial Resource Status === GPU Count: 1 GPU 0: b'NVIDIA GeForce RTX 4080 SUPER' Total: 15.99 GB Used: 2.80 GB (17.5%) Free: 13.19 GB ========================================
# Download model from HuggingFace (same base model as D1_01)
HF_LLM_MODEL = "NousResearch/Nous-Hermes-2-Mistral-7B-DPO"
[No output generated]
# 4-bit quantization config for efficient loading
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(HF_LLM_MODEL)
# Load model with 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(
HF_LLM_MODEL,
device_map="auto",
quantization_config=quantization_config,
)
# Verify model config
print(model.config)
Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]
MistralConfig {
"architectures": [
"MistralForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 1,
"dtype": "float16",
"eos_token_id": 32000,
"head_dim": null,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 32768,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"quantization_config": {
"_load_in_4bit": true,
"_load_in_8bit": false,
"bnb_4bit_compute_dtype": "bfloat16",
"bnb_4bit_quant_storage": "uint8",
"bnb_4bit_quant_type": "nf4",
"bnb_4bit_use_double_quant": false,
"llm_int8_enable_fp32_cpu_offload": false,
"llm_int8_has_fp16_weight": false,
"llm_int8_skip_modules": null,
"llm_int8_threshold": 6.0,
"load_in_4bit": true,
"load_in_8bit": false,
"quant_method": "bitsandbytes"
},
"rms_norm_eps": 1e-05,
"rope_theta": 10000.0,
"sliding_window": 4096,
"tie_word_embeddings": false,
"transformers_version": "4.57.2",
"use_cache": false,
"vocab_size": 32002
}
# Pipeline setup
text_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
return_full_text=False,
eos_token_id=tokenizer.eos_token_id,
skip_special_tokens=True,
)
llm = HuggingFacePipeline(pipeline=text_pipeline)
Device set to use cuda:0
chat_llm = ChatHuggingFace(llm=llm)
[No output generated]
SimpleSequentialChain¶
The SimpleSequentialChain is the most basic form of a chain. It takes a single input, passes it to a prompt, and the output of one step is directly passed as input to the next. It does not track intermediate steps or provide access to named outputs, making it suitable for linear, single-purpose chains.
Use case: quick linear pipelines like "generate → explain" or "summarize → expand".
template1 = "Give me a simple bullet point outline for a blog post on {topic}"
prompt1 = ChatPromptTemplate.from_template(template1)
chain1 = prompt1|chat_llm
template2 = "Write a blog post using this outline: {outline}"
prompt2 = ChatPromptTemplate.from_template(template2)
chain2 = prompt2|chat_llm
[No output generated]
full_chain = chain1|chain2
[No output generated]
result = full_chain.invoke("Artificial Intelligence") # That piece of code takes quite some time to execute
print(result.content)
Introduction to Artificial Intelligence (AI) Artificial Intelligence (AI) has been a subject of fascination and speculation for decades. It is a field of computer science that focuses on creating machines or software that can think, learn, and act like humans. AI has a rich history dating back to the 1950s, and it has come a long way since then. Definition and history of AI AI is defined as the development of computer systems that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and language translation. The concept of AI traces its roots back to the 1940s and 1950s when mathematicians and computer scientists began exploring the idea of machines that could mimic human intelligence. Evolution of AI technologies Over the years, AI technology has evolved significantly. The first AI program, Logic Theorist, was developed in 1956, followed by the development of expert systems in the 1970s and 1980s. The advent of machine learning and deep learning in the 1990s and 2000s brought about a
SequentialChain¶
SequentialChain is more flexible than SimpleSequentialChain. It supports multiple input and output variables and keeps track of intermediate outputs. Each step can depend on one or more outputs from earlier steps.
Use case: more complex workflows that need to reuse or transform earlier outputs in later steps.
template1 = "Give a summary of this employee's performance review:\n{review}"
prompt1 = ChatPromptTemplate.from_template(template1)
chain_1 = prompt1|chat_llm
[No output generated]
template2 = "Identify key employee weaknesses in this review summary:\n{review_summary}"
prompt2 = ChatPromptTemplate.from_template(template2)
chain_2 = prompt2|chat_llm
[No output generated]
template3 = "Create a personalized plan to help address and fix these weaknesses:\n{weaknesses}"
prompt3 = ChatPromptTemplate.from_template(template3)
chain_3 = prompt3|chat_llm
[No output generated]
# Note: The following PromptTemplate examples are for reference only.
# The actual chains (chain_1, chain_2, chain_3) above use ChatPromptTemplate.from_template().
# prompt1 = PromptTemplate(input_variables=["topic"], template="Generate a question about {topic}.")
# prompt2 = PromptTemplate(input_variables=["question"], template="Provide a short answer to: {question}")
[No output generated]
seq_chain = chain_1|chain_2|chain_3
[No output generated]
employee_review = '''
Employee Information:
Name: Simeon Harrison
Position: Machine Learning Engineer
Date of Review: 10 March, 2025
Strengths:
Simeon is a highly skilled machine learning engineer with a deep understanding of programming languages, algorithms, and data science. His technical expertise shines through in his ability to efficiently solve complex problems and deliver high-quality code.
One of Simeon's greatest strengths is his collaborative nature. He actively engages with cross-functional teams, contributing valuable insights and seeking input from others. His open-mindedness and willingness to learn from colleagues make him a true team player.
Simeon consistently demonstrates initiative and self-motivation. He takes the lead in seeking out new projects and challenges, and his proactive attitude has led to significant improvements in existing processes and systems. His dedication to self-improvement and growth is commendable.
Another notable strength is Simeon's teaching skills. He has shown great prowess in developing teaching materials and delivering high-end online courses. His adaptability allows him to seamlessly transition between different projects and tasks such as teaching, which makes him a valuable asset to the team.
Weaknesses:
While Simeon possesses numerous strengths, there are a few areas where he could benefit from improvement. One such area is time management. Occasionally, Simeon struggles with effectively managing his time, resulting in missed deadlines or the need for additional support to complete tasks on time, especially before delivering courses for the first time. Developing better prioritization and time management techniques would greatly enhance his efficiency.
Another area for improvement is Simeon's written communication skills. He does not answer customer requests promptly, as he finds it difficult to focus on several tasks simultaneously. There were also instances where his written documentation lacked clarity, leading to confusion among team members. Focusing on enhancing his written communication abilities will help him effectively convey ideas and instructions.
Additionally, Simeon tends to take on too many responsibilities and hesitates to delegate tasks to others. This can result in an excessive workload and potential burnout. Encouraging him to delegate tasks appropriately will not only alleviate his own workload but also foster a more balanced and productive team environment.
'''
[No output generated]
results = seq_chain.invoke(employee_review) # This too takes time to run
[No output generated]
print(results.content)
To address and fix Simeon Harrison's weakness in time management, a personalized plan can be created as follows: 1. Set clear goals and priorities: Help Simeon identify his most important tasks and set clear goals for each project. This will help him prioritize his work and focus on the most critical tasks first. 2. Use time-tracking tools: Simeon can use time-tracking tools such as Toggl or RescueTime to better understand how he spends his time. This will help him identify any time-wasting activities and adjust his schedule accordingly. 3. Break tasks into smaller chunks: Simeon can break down larger tasks into smaller, more manageable chunks. This will help him avoid feeling overwhelmed and make it easier to track progress. 4. Use a calendar or scheduling app: A calendar or scheduling app can help Simeon stay organized and ensure that he doesn't miss any deadlines. He can set reminders for important tasks and schedule blocks of time for specific tasks. 5. Delegate tasks when possible: If Simeon has too much on his plate, he should consider delegating tasks to other team members when appropriate
print(chain_1.invoke(employee_review).content)
In summary, Simeon Harrison is a highly skilled Machine Learning Engineer with strong technical expertise, collaborative skills, initiative, and teaching abilities. However, he struggles with time management, written communication, and delegation. Improving these areas will make him an even more valuable asset to his team.
print((chain_1|chain_2).invoke(employee_review).content)
The key employee weaknesses identified in the review summary are: 1. Time management: Simeon Harrison needs to improve his time management skills and develop better techniques for prioritization. 2. Written communication: His written communication abilities need enhancement to communicate more effectively. 3. Delegation of tasks: Simeon needs to work on delegating tasks appropriately, which will help him manage his workload better and foster teamwork.
print((chain_1|chain_2|chain_3).invoke(employee_review).content)
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
To address and fix these weaknesses, consider the following personalized plan: 1. Time Management: a. Prioritize tasks: Create a prioritized to-do list each day, focusing on the most critical tasks first. b. Use time tracking tools: Utilize apps or software to track how much time is spent on tasks and identify areas for improvement. c. Set realistic goals: Break down larger projects into smaller, manageable tasks and set achievable deadlines. d. Eliminate distractions: Identify and minimize distractions during work hours, such as social media or personal emails. e. Learn to say no: Avoid overcommitting by setting boundaries and learning to say no to non-essential tasks. 2. Written Communication: a. Seek feedback: Ask colleagues or supervisors for constructive feedback on written communication skills. b. Read widely: Expand vocabulary and improve grammar by reading extensively. c. Practice writing: Regularly write emails, reports, or memos to improve writing skills and receive feedback. d. Attend workshops or courses: Enroll in writing workshops or online courses to learn new techniques and improve writing
LLMRouterChain¶
LLMRouterChain is used when you want to route a prompt to different chains or prompts depending on the input. It allows conditional execution paths, where an LLM can decide which destination (e.g., math, history, writing) to route a given input to based on predefined criteria or patterns.
Use case: topic routing, multi-skill assistants, task-specific logic dispatching.
beginner_template = '''You are an elementary school teacher who is really
focused on students in the age group of 6 to 10 and explain complex topics in easy to understand terms for the given age group.
You assume no prior knowledge. Here is the question\n{input}'''
[No output generated]
expert_template = '''You are a world expert physics professor who explains physics topics
to advanced audience members. You can assume anyone you answer has a
PhD level understanding of Physics. Here is the question\n{input}'''
[No output generated]
prompt_infos = [
{'name':'advanced physics','description': 'Answers advanced physics questions',
'prompt_template':expert_template},
{'name':'beginner physics','description': 'Answers basic beginner physics questions',
'prompt_template':beginner_template},
]
[No output generated]
chain = MultiPromptChain.from_prompts(chat_llm, prompt_infos, verbose=True)
/opt/pixi/.pixi/envs/default/lib/python3.13/site-packages/pydantic/main.py:250: LangChainDeprecationWarning: Please see the migration guide at: https://python.langchain.com/docs/versions/migrating_memory/ validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
print(chain.invoke("Why does a basket ball bounce?")['text'])
> Entering new MultiPromptChain chain...
beginner physics: {'input': 'Why does a basketball bounce?'} > Finished chain. Imagine you have a toy ball. When you throw it onto the ground, it gets squished a little bit, right? But then it springs back up into the air. That's because when you throw it, the air inside the ball gets compressed and then, when it lands, all that air wants to expand back to its normal size. This sudden expansion creates a force that pushes the ball into the air. And that's what makes it bounce!
print(chain.invoke("How do Feynman Diagrams work?")['text'])
> Entering new MultiPromptChain chain...
advanced physics: {'input': 'How do Feynman Diagrams work?'} > Finished chain. Feynman diagrams are a graphical representation of the mathematical equations used in quantum field theory. They are named after the physicist Richard Feynman, who developed them in the 1940s. These diagrams help visualize the elementary particle interactions that occur in the subatomic world. In essence, Feynman diagrams represent the probability amplitude of a given process by considering all possible interactions between particles. They are composed of a series of lines and vertices, where each line represents a particle and a vertex (or point) represents an interaction. The lines carry information about the particle's momentum, direction, and its corresponding wave function. The diagrams follow a set of rules, known as "Feynman rules," which are used to calculate the probability amplitude of a process. These rules involve assigning a specific mathematical expression to each type of vertex and propagator (the lines between vertices). The overall probability amplitude is then calculated by summing over all possible diagrams for a given process, often using techniques from quantum mechanics and complex analysis. Feynman diagrams are particularly useful in understanding processes involving high-energy particle interactions, such as those that occur in particle accelerators like the Large Hadron Collider
print(chain.invoke("How high can an astronaut jump on the moon?")['text'])
> Entering new MultiPromptChain chain...
beginner physics: {'input': "What factors affect the height of an astronaut's jump on the moon? Consider gravity, atmosphere, and any other relevant factors."} > Finished chain. Hey there, little explorer! Imagine you're on the moon, and you want to jump as high as you can. There are a few things that can affect how high you'll jump. 1. Gravity: On Earth, we weigh about 5-times more than on the moon. That's because Earth has more gravity, which pulls us down. On the moon, gravity is much weaker, so we're lighter and can jump higher. If we jumped on the moon, we'd jump higher than on Earth because the moon's gravity is only one-sixth as strong! 2. Atmosphere: The moon doesn't have much air, which we call an atmosphere. On Earth, our air helps us jump higher because it pushes against our feet when we push off the ground. But on the moon, without much air, your jump wouldn't be pushed up as much, and you'd jump lower than on Earth. 3. Muscle Strength: Our muscles help us jump! The stronger our muscles are, the higher we can jump. So, if an astronaut is really strong, they can jump higher on the moon than someone who isn't as
TransformChain¶
TransformChain allows you to insert arbitrary Python logic into a LangChain pipeline. It lets you define a transformation function that takes in inputs and returns a modified dictionary of outputs. This is useful for pre- or post-processing data before or after it passes through a model or another chain.
Use case: text normalization, formatting, filtering, or enrichment between model steps.
# Define a simple transformation function
def uppercase_fn(inputs: dict) -> dict:
return {"output": inputs["text"].upper()}
transform_chain = TransformChain(input_variables=["text"], output_variables=["output"], transform=uppercase_fn)
[No output generated]
# Run it
output = transform_chain.invoke({"text": "this should be uppercase"})
print("TransformChain output:", output)
TransformChain output: {'text': 'this should be uppercase', 'output': 'THIS SHOULD BE UPPERCASE'}
MathChain¶
LangChain's MathChain is a specialized chain used to evaluate or solve math-related prompts, especially those involving multi-step reasoning or intermediate calculations. It's part of LangChain’s approach to tool-augmented reasoning, where LLMs use helper functions (like a calculator) to improve accuracy.
It does so by:
Having the LLM generate a math expression or plan
Using a Python REPL tool (or custom calculator tool) to actually compute the result
Returning the final result in a structured way
It's especially useful for:
Word problems
Problems involving arithmetic, algebra, or logic
Cases where hallucination of numbers is problematic
# pip install numexpr
[No output generated]
# Initialize the math chain
math_chain = LLMMathChain.from_llm(llm=llm)
# Run a word problem
result = math_chain.invoke("If a train travels 60 km in 1.5 hours, what is its average speed?")
print(result)
{'question': 'If a train travels 60 km in 1.5 hours, what is its average speed?', 'answer': 'Answer: 0.011111111111111112'}
# Run a word problem that breaks it
result = math_chain.invoke("A football is kicked from the ground and reaches its maximum height of 5m in 10m horizontal distance from where it was kicked. How far from the kicking point will it land, assuming there is no air resistance and it flies in a perfectly parabolic arc?")
print(result)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[32], line 2
1 # Run a word problem that breaks it
----> 2 result = math_chain.invoke("A football is kicked from the ground and reaches its maximum height of 5m in 10m horizontal distance from where it was kicked. How far from the kicking point will it land, assuming there is no air resistance and it flies in a perfectly parabolic arc?")
3 print(result)
File /opt/pixi/.pixi/envs/default/lib/python3.13/site-packages/langchain_classic/chains/base.py:167, in Chain.invoke(self, input, config, **kwargs)
164 try:
165 self._validate_inputs(inputs)
166 outputs = (
--> 167 self._call(inputs, run_manager=run_manager)
168 if new_arg_supported
169 else self._call(inputs)
170 )
172 final_outputs: dict[str, Any] = self.prep_outputs(
173 inputs,
174 outputs,
175 return_only_outputs,
176 )
177 except BaseException as e:
File /opt/pixi/.pixi/envs/default/lib/python3.13/site-packages/langchain_classic/chains/llm_math/base.py:280, in LLMMathChain._call(self, inputs, run_manager)
274 _run_manager.on_text(inputs[self.input_key])
275 llm_output = self.llm_chain.predict(
276 question=inputs[self.input_key],
277 stop=["```output"],
278 callbacks=_run_manager.get_child(),
279 )
--> 280 return self._process_llm_result(llm_output, _run_manager)
File /opt/pixi/.pixi/envs/default/lib/python3.13/site-packages/langchain_classic/chains/llm_math/base.py:242, in LLMMathChain._process_llm_result(self, llm_output, run_manager)
240 else:
241 msg = f"unknown format from LLM: {llm_output}"
--> 242 raise ValueError(msg)
243 return {self.output_key: answer}
ValueError: unknown format from LLM: First we need to find the angle of the trajectory. The maximum height can be found by using the Pythagorean Theorem on the right triangle formed by the ground, the horizontal distance, and the height. The horizontal distance is the hypotenuse, and the height is one of the legs. The angle $\theta$ of the trajectory can be found by using the cosine function on the horizontal distance.
Let $h$ be the height from the ground, $d$ be the horizontal distance, and $x$ be the distance from the kicking point to the point where the football lands.
The expression for $h$ is $h = \sqrt{d^2 + x^2}$. Thus, using the Pythagorean Theorem, we can substitute for $h$ in the cosine function to find $\theta$.
```text
(10**2 + (x - 5)**2)**0.5 - x
```
...numexpr.evaluate("(10**2 + (x - 5)**2)**0.5 - x")...
```output
-5.8309518948453 # Running an algebra problem does not work
result = math_chain.invoke("Can you solve the following equation for x? x^2 + x - 2 = 0")
print(result)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /opt/pixi/.pixi/envs/default/lib/python3.13/site-packages/numexpr/necompiler.py:776, in getArguments(names, local_dict, global_dict, _frame_depth)
775 try:
--> 776 a = local_dict[name]
777 except KeyError:
KeyError: 'x'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
File /opt/pixi/.pixi/envs/default/lib/python3.13/site-packages/langchain_classic/chains/llm_math/base.py:206, in LLMMathChain._evaluate_expression(self, expression)
204 local_dict = {"pi": math.pi, "e": math.e}
205 output = str(
--> 206 numexpr.evaluate(
207 expression.strip(),
208 global_dict={}, # restrict access to globals
209 local_dict=local_dict, # add common mathematical functions
210 ),
211 )
212 except Exception as e:
File /opt/pixi/.pixi/envs/default/lib/python3.13/site-packages/numexpr/necompiler.py:991, in evaluate(ex, local_dict, global_dict, out, order, casting, sanitize, _frame_depth, **kwargs)
990 else:
--> 991 raise e
File /opt/pixi/.pixi/envs/default/lib/python3.13/site-packages/numexpr/necompiler.py:896, in validate(ex, local_dict, global_dict, out, order, casting, _frame_depth, sanitize, **kwargs)
895 names, ex_uses_vml = _names_cache.c[expr_key]
--> 896 arguments = getArguments(names, local_dict, global_dict, _frame_depth=_frame_depth)
898 # Create a signature
File /opt/pixi/.pixi/envs/default/lib/python3.13/site-packages/numexpr/necompiler.py:778, in getArguments(names, local_dict, global_dict, _frame_depth)
777 except KeyError:
--> 778 a = global_dict[name]
779 arguments.append(numpy.asarray(a))
KeyError: 'x'
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
Cell In[33], line 2
1 # Running an algebra problem does not work
----> 2 result = math_chain.invoke("Can you solve the following equation for x? x^2 + x - 2 = 0")
3 print(result)
File /opt/pixi/.pixi/envs/default/lib/python3.13/site-packages/langchain_classic/chains/base.py:167, in Chain.invoke(self, input, config, **kwargs)
164 try:
165 self._validate_inputs(inputs)
166 outputs = (
--> 167 self._call(inputs, run_manager=run_manager)
168 if new_arg_supported
169 else self._call(inputs)
170 )
172 final_outputs: dict[str, Any] = self.prep_outputs(
173 inputs,
174 outputs,
175 return_only_outputs,
176 )
177 except BaseException as e:
File /opt/pixi/.pixi/envs/default/lib/python3.13/site-packages/langchain_classic/chains/llm_math/base.py:280, in LLMMathChain._call(self, inputs, run_manager)
274 _run_manager.on_text(inputs[self.input_key])
275 llm_output = self.llm_chain.predict(
276 question=inputs[self.input_key],
277 stop=["```output"],
278 callbacks=_run_manager.get_child(),
279 )
--> 280 return self._process_llm_result(llm_output, _run_manager)
File /opt/pixi/.pixi/envs/default/lib/python3.13/site-packages/langchain_classic/chains/llm_math/base.py:232, in LLMMathChain._process_llm_result(self, llm_output, run_manager)
230 if text_match:
231 expression = text_match.group(1)
--> 232 output = self._evaluate_expression(expression)
233 run_manager.on_text("\nAnswer: ", verbose=self.verbose)
234 run_manager.on_text(output, color="yellow", verbose=self.verbose)
File /opt/pixi/.pixi/envs/default/lib/python3.13/site-packages/langchain_classic/chains/llm_math/base.py:217, in LLMMathChain._evaluate_expression(self, expression)
212 except Exception as e:
213 msg = (
214 f'LLMMathChain._evaluate("{expression}") raised error: {e}.'
215 " Please try again with a valid numerical expression"
216 )
--> 217 raise ValueError(msg) from e
219 # Remove any leading and trailing brackets from the output
220 return re.sub(r"^\[|\]$", "", output)
ValueError: LLMMathChain._evaluate("
x**2 + x - 2 == 0
") raised error: 'x'. Please try again with a valid numerical expression Applying Chains¶
Task decomposition (or "dividing labor") is a key concept in prompt engineering and chain design.
It involves the technique of:
Breaking down a complex task into smaller, manageable sub-tasks
Solving them step-by-step, and optionally recombining the results
This leads to:
Better accuracy
Clearer LLM reasoning
Easier chaining of logic
Take for example this task: “Write a summary of the main arguments in this article, and list 3 questions the reader should consider.”
We break it down into two steps:
Summarize the text
Generate reflective questions based on the summary
# Summarization Prompt
summarize_prompt = PromptTemplate.from_template(
"Summarize the main arguments of the following article:\n\n{article}"
)
[No output generated]
# Reflection Prompt
question_prompt = PromptTemplate.from_template(
"Based on the following summary, list 3 important questions the reader should consider:\n\n{summary}"
)
[No output generated]
# Compose chains using the pipe syntax
summarize_chain = summarize_prompt | llm
question_chain = question_prompt | llm
[No output generated]
# Input text
article_text = (
"The demand for AI skills has exploded in recent years, and it’s likely to continue as more and more businesses embrace artificial intelligence solutions."
"The number of employers looking for AI-literate employees quadrupled between 2010 and 2019, and AI skills are becoming essential across a wide range of industries, making them a valuable asset for advancing your career and staying competitive in a rapidly evolving job market."
"AI-related jobs typically pay 11% more than non-AI roles within the same company."
"Skills with AI are particularly useful if you plan to work in the information, professional services, administrative, or finance sectors."
"AI adoption offers several potential benefits. It helps automate repetitive processes like data entry to improve operational efficiency."
"AI can also process and analyze large data sets rapidly, enabling it to identify patterns and make reasoned predictions to aid robust decision-making."
"For some businesses, using tools such as call bots and chatbots helps streamline customer interactions to boost engagement and satisfaction."
"Numerous businesses have used AI to improve customer experiences and drive growth. For example, J.P. Morgan and Chase developed the award-winning OmniAI platform to deliver accurate financial insights."
"The model can perform deep, comprehensive analyses of vast data sets, reducing operational costs and enabling faster solution development."
"AI has the potential to automate non-routine tasks and solve some of the world’s most complex problems."
"For example, AI technologies can model climate change predictions, improve energy grid efficiency, and even help you reduce your household energy consumption through smart home heating systems."
"Other applications include analyzing data during clinical trials and optimizing journeys to reduce the load on transport infrastructure."
"However, calculating the impact of AI on global challenges is complex, and even seemingly perfect solutions can have unintended outcomes."
"For instance, improving your home’s efficiency may encourage you to spend more time in your perfectly heated house, increasing your use of energy-hungry appliances."
"Accounting for unforeseen effects is just one potential pitfall of relying on AI."
"Poor data protection practices increase the risk of privacy violations, while training models on biased data could lead to discrimination."
"This is why ethical practices are essential for responsible AI development."
)
[No output generated]
# Step-by-step execution
summary = summarize_chain.invoke({"article": article_text})
questions = question_chain.invoke({"summary": summary})
[No output generated]
# Output
print("Summary:\n", summary)
print("\nReflective Questions:\n", questions)
Summary: To avoid unintended consequences, it’s essential to develop AI solutions with appropriate safeguards, including transparent processes and secure data management. The article argues that AI skills are increasingly in demand, and that the number of employers looking for AI-literate employees has quadrupled between 2010 and 2019. AI skills are becoming essential across a wide range of industries, making them a valuable asset for advancing your career and staying competitive in a rapidly evolving job market. AI-related jobs typically pay 11% more than non-AI roles within the same company. Skills with AI are particularly useful if you plan to work in the information, professional services, administrative, or finance sectors. AI adoption offers several potential benefits, such as automating repetitive processes, improving operational efficiency, and enabling faster solution development. However, calculating the impact of AI on global challenges is complex, and even seemingly perfect solutions can have unintended outcomes, such as encouraging more energy usage. To avoid unintended consequences, it’s essential to develop AI solutions with appropriate safeguards, including transparent processes and secure data management. Reflective Questions: Important questions to consider: 1. What are the potential unintended consequences of developing AI solutions? 2. How can we ensure transparent processes and secure data management when developing AI solutions? 3. How can AI-related skills advance your career and make you more competitive in the job market?
⚠️ Important: GPU Memory Management¶
Running cleanup cells alone does NOT fully release GPU memory!
The cleanup cell removes Python references and clears PyTorch cache, but the kernel process still holds GPU allocations until it terminates.
To fully release GPU memory:
- Run the cleanup cell below
- Then: Kernel → Shutdown (or restart) this notebook's kernel
Before running the next notebook:
- Check GPU status in the startup cell
- If GPU is still >80% used, shutdown unused notebook kernels
# === Resource Cleanup ===
import gc
import torch
print("=== Cleaning Up Resources ===\n")
# 1. Delete HuggingFace objects
print("1. Releasing HuggingFace resources...")
hf_objects = ['model', 'tokenizer', 'text_pipeline', 'llm', 'chat_llm', 'pipe']
deleted = []
for obj_name in hf_objects:
if obj_name in globals():
del globals()[obj_name]
deleted.append(obj_name)
if deleted:
print(f" ✓ Deleted: {', '.join(deleted)}")
else:
print(" No HuggingFace objects to delete")
# 2. Clear GPU cache
print("\n2. Clearing GPU cache...")
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.synchronize()
# 3. Python garbage collection
gc.collect()
print("3. ✓ Garbage collection complete")
# 4. Show SYSTEM-WIDE GPU memory status (using pynvml)
print("\n4. Final GPU Memory Status (System-Wide):")
try:
import pynvml
pynvml.nvmlInit()
for i in range(pynvml.nvmlDeviceGetCount()):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
info = pynvml.nvmlDeviceGetMemoryInfo(handle)
used_gb = info.used / 1024**3
free_gb = info.free / 1024**3
print(f" GPU {i}: {used_gb:.2f} GB used, {free_gb:.2f} GB free")
pynvml.nvmlShutdown()
except:
# Fallback to per-process only
for i in range(torch.cuda.device_count()):
allocated = torch.cuda.memory_allocated(i) / 1024**3
print(f" GPU {i}: {allocated:.2f} GB (this process only)")
print("\n" + "="*40)
print("✓ Cleanup complete!")
# === OPTIONAL: Shutdown Kernel to Fully Release GPU Memory ===
# Uncomment the next line to shutdown this kernel after cleanup
# (Required to fully release GPU memory for other notebooks)
# from IPython import get_ipython; get_ipython().kernel.do_shutdown(restart=False)
=== Cleaning Up Resources === 1. Releasing HuggingFace resources... ✓ Deleted: model, tokenizer, text_pipeline, llm, chat_llm 2. Clearing GPU cache...
3. ✓ Garbage collection complete 4. Final GPU Memory Status (System-Wide):
GPU 0: 9.05 GB used, 6.94 GB free ======================================== ✓ Cleanup complete!