Skip to content

Memory leak in mlx_lm.server? #1124

@ivanfioravanti

Description

@ivanfioravanti

While running MMLU Pro tests against various model using mlx_lm.server I systematically went out of memory with system automatically rebooted due to this.

Here a way to replicate the issue.

Start mlx_lm.server like this:

 mlx_lm.server --cache-limit-gb 5 --use-default-chat-template

Ensure that you have OpenAI python package installed.
Create a test_memory_leak.py with following code and run it.
Memory will keep growing over time.
I used asitop to check memory growth during test execution.

import openai

def get_story_about_topic(client: openai.OpenAI, topic: str) -> str | None:
    try:
        response = client.chat.completions.create(
            model="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit",
            messages=[{
                "role": "user",
                "content": f"Write a story about {topic}.",
                }],
            temperature=0,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
            max_tokens=4000
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error getting story: {e}")
        return None

def main():
    client = openai.OpenAI(
        base_url="http://localhost:8080/v1",
        api_key="secret"
    )
    
    # Use predefined topics instead of generating them
    topics = [
        "space exploration", "ancient civilizations", "artificial intelligence", "deep-sea adventures",
        "medieval knights", "future technologies", "magical forests", "urban legends",
        "time travel", "mystery novels", "historical battles", "superhero origins",
        "parallel universes", "mythical creatures", "pirate adventures", "lost cities",
        "robot revolutions", "haunted houses", "underwater worlds", "desert survival",
        "volcanic islands", "wild west tales", "vampire legends", "post-apocalyptic survival",
        "alien encounters", "cyberpunk dystopias", "fantasy kingdoms", "epic journeys",
        "cosmic horrors", "fairy tale retellings", "espionage missions", "arctic explorations",
        "jungle expeditions", "space station mysteries", "ancient prophecies", "steampunk inventions",
        "dark academia", "samurai legends", "futuristic wars", "love stories in the stars",
        "modern witchcraft", "time-loop mysteries", "alchemy experiments", "underground civilizations",
        "floating cities", "alien wildlife", "forgotten gods", "treasure hunts",
        "prehistoric adventures", "galactic empires", "solar system tourism", "ancient libraries",
        "quantum realms", "genetic engineering", "mutant stories", "deserted islands",
        "supernatural detectives", "mountain legends", "urban fantasy", "living planets",
        "high seas drama", "magical academies", "ancient artifacts", "space colony conflicts",
        "haunted forests", "legendary swords", "wizard duels", "infinite mazes",
        "invisible cities", "celestial beings", "zombie apocalypses", "cursed treasures",
        "cyborg tales", "ancient ruins exploration", "lost expeditions", "parallel timelines",
        "psychological thrillers", "dreamworlds", "alien languages", "space-time anomalies",
        "forgotten histories", "magical creatures", "underworld journeys", "shape-shifters",
        "starship adventures", "haunted artifacts", "future utopias", "city of lights",
        "ancient curses", "divine intervention", "virtual realities", "supernatural realms",
        "heroic sagas", "epic betrayals", "mysterious caves", "intergalactic diplomacy",
        "enchanted islands", "dark rituals", "cosmic explorations", "rebellion tales",
        "wizards and witches", "time travel paradoxes", "parallel dimensions", "lost civilizations",
        "Mila Kunis", "Jennifer Aniston", "Leonardo DiCaprio", "Scarlett Johansson",
        "Tom Cruise", "Gal Gadot", "Ryan Reynolds", "Emma Stone",
        "Chris Hemsworth", "Natalie Portman", "Ryan Gosling", "Anne Hathaway",
        "Nicole Kidman", "Hugh Jackman", "Keira Knightley", "Russell Crowe",
        "New York City", "Los Angeles", "London", "Paris",
        "Tokyo", "Sydney", "Berlin", "Rome",
        "Cape Town", "Rio de Janeiro", "Cairo", "Mumbai",
        "Shanghai", "São Paulo", "Mexico City", "Buenos Aires"
    ]
    print(f"Using {len(topics)} predefined topics")
    
    print("\nStarting story generation...")
    
    # Iterate through the pre-generated topics
    for i, topic in enumerate(topics):
        try:
            print(f"\nIteration {i + 1} - {topic}")
            
            # Get story about the topic
            story = get_story_about_topic(client, topic)
            if not story:
                continue
            print(f"Story: {story}")
            
        except Exception as e:
            print(f"Error in iteration {i}: {e}")
            continue

if __name__ == "__main__":
    main()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions