MongoDB with Mongoose — Patterns and Pitfalls

Schema Design Philosophy

MongoDB schema design is fundamentally different from relational databases. Instead of normalizing data, you design schemas around your query patterns.

Embedding vs Referencing

Mongoose Schema Definition

const mongoose = require('mongoose');
const { Schema } = mongoose;

// Address schema (embedded subdocument)
const addressSchema = new Schema({
  street: { type: String, required: true },
  city: { type: String, required: true },
  state: { type: String, required: true },
  zip: { type: String, required: true, match: /^\d{5}(-\d{4})?$/ },
  isPrimary: { type: Boolean, default: false },
});

// User schema
const userSchema = new Schema({
  name: {
    type: String,
    required: [true, 'Name is required'],
    trim: true,
    minlength: 2,
    maxlength: 100,
  },
  email: {
    type: String,
    required: true,
    unique: true,
    lowercase: true,
    match: [/^\S+@\S+\.\S+$/, 'Invalid email format'],
  },
  passwordHash: { type: String, required: true, select: false },
  role: { type: String, enum: ['user', 'admin', 'moderator'], default: 'user' },
  addresses: [addressSchema],  // Embedded
  orders: [{ type: Schema.Types.ObjectId, ref: 'Order' }],  // Referenced
  lastLoginAt: Date,
}, {
  timestamps: true,  // Adds createdAt and updatedAt
  toJSON: { virtuals: true },
  toObject: { virtuals: true },
});

// Virtual field (computed, not stored)
userSchema.virtual('displayName').get(function() {
  return `${this.name} (${this.role})`;
});

// Compound index
userSchema.index({ email: 1 }, { unique: true });
userSchema.index({ role: 1, createdAt: -1 });

const User = mongoose.model('User', userSchema);

Middleware (Pre/Post Hooks)

const bcrypt = require('bcrypt');

// Pre-save hook — hash password before saving
userSchema.pre('save', async function(next) {
  if (!this.isModified('passwordHash')) return next();
  this.passwordHash = await bcrypt.hash(this.passwordHash, 12);
  next();
});

// Pre-find hook — exclude deleted users by default
userSchema.pre(/^find/, function(next) {
  this.find({ deletedAt: { $exists: false } });
  next();
});

// Post-save hook — log new user creation
userSchema.post('save', function(doc) {
  if (doc.wasNew) {
    console.log(`New user created: ${doc.email}`);
  }
});

// Instance method
userSchema.methods.comparePassword = async function(candidatePassword) {
  return bcrypt.compare(candidatePassword, this.passwordHash);
};

// Static method
userSchema.statics.findByEmail = function(email) {
  return this.findOne({ email: email.toLowerCase() });
};

Population (Joins)

// Order schema with references
const orderSchema = new Schema({
  user: { type: Schema.Types.ObjectId, ref: 'User', required: true },
  items: [{
    product: { type: Schema.Types.ObjectId, ref: 'Product' },
    quantity: { type: Number, min: 1 },
    price: Number,
  }],
  total: Number,
  status: { type: String, enum: ['pending', 'shipped', 'delivered'], default: 'pending' },
}, { timestamps: true });

// Populate user and product references
const order = await Order.findById(orderId)
  .populate('user', 'name email')        // Only select name and email
  .populate('items.product', 'name price image');

// Virtual populate (reverse reference without storing IDs)
userSchema.virtual('recentOrders', {
  ref: 'Order',
  localField: '_id',
  foreignField: 'user',
  options: { sort: { createdAt: -1 }, limit: 5 },
});

const user = await User.findById(userId).populate('recentOrders');

Aggregation Pipeline

For complex data transformations, use the aggregation pipeline.

Aggregation Pipeline

// Revenue by category for the last 30 days
const revenue = await Order.aggregate([
  // Stage 1: Filter recent delivered orders
  {
    $match: {
      status: 'delivered',
      createdAt: { $gte: new Date(Date.now() - 30 * 24 * 60 * 60 * 1000) },
    },
  },
  // Stage 2: Unwind items array (one doc per item)
  { $unwind: '$items' },
  // Stage 3: Lookup product details
  {
    $lookup: {
      from: 'products',
      localField: 'items.product',
      foreignField: '_id',
      as: 'productInfo',
    },
  },
  { $unwind: '$productInfo' },
  // Stage 4: Group by category
  {
    $group: {
      _id: '$productInfo.category',
      totalRevenue: { $sum: { $multiply: ['$items.price', '$items.quantity'] } },
      orderCount: { $sum: 1 },
      avgOrderValue: { $avg: { $multiply: ['$items.price', '$items.quantity'] } },
    },
  },
  // Stage 5: Sort by revenue
  { $sort: { totalRevenue: -1 } },
  // Stage 6: Reshape output
  {
    $project: {
      _id: 0,
      category: '$_id',
      totalRevenue: { $round: ['$totalRevenue', 2] },
      orderCount: 1,
      avgOrderValue: { $round: ['$avgOrderValue', 2] },
    },
  },
]);

Indexing Strategies

// Single field index
userSchema.index({ email: 1 });

// Compound index (order matters!)
// Supports queries on: { role }, { role, createdAt }, but NOT { createdAt } alone
orderSchema.index({ status: 1, createdAt: -1 });

// Text index for full-text search
productSchema.index({ name: 'text', description: 'text' });
const results = await Product.find({ $text: { $search: 'wireless headphones' } });

// TTL index (auto-delete expired documents)
sessionSchema.index({ expiresAt: 1 }, { expireAfterSeconds: 0 });

// Partial index (index only matching documents)
userSchema.index(
  { email: 1 },
  { partialFilterExpression: { role: 'admin' } }
);

Common Pitfalls and Fixes

1. Unbounded Arrays

// BAD: Array grows forever — hits 16MB limit
const chatSchema = new Schema({
  participants: [ObjectId],
  messages: [{           // Can grow to millions!
    sender: ObjectId,
    text: String,
    sentAt: Date,
  }],
});

// GOOD: Separate collection for messages
const messageSchema = new Schema({
  chatId: { type: ObjectId, ref: 'Chat', index: true },
  sender: { type: ObjectId, ref: 'User' },
  text: String,
  sentAt: { type: Date, default: Date.now },
});
messageSchema.index({ chatId: 1, sentAt: -1 });

2. Missing lean() for Read-Only Queries

// Without lean(): Returns full Mongoose documents (change tracking, virtuals, methods)
// ~3x slower, ~3x more memory
const users = await User.find({ role: 'user' });

// With lean(): Returns plain JavaScript objects
// Much faster for read-only operations
const users = await User.find({ role: 'user' }).lean();

3. N+1 Query Problem

// BAD: One query per order for user data
const orders = await Order.find();
for (const order of orders) {
  order.userName = (await User.findById(order.user)).name; // N queries!
}

// GOOD: Use populate
const orders = await Order.find().populate('user', 'name');

// BETTER for complex joins: Use aggregation $lookup
const orders = await Order.aggregate([
  { $lookup: { from: 'users', localField: 'user', foreignField: '_id', as: 'userInfo' } },
  { $unwind: '$userInfo' },
  { $project: { total: 1, status: 1, userName: '$userInfo.name' } },
]);

4. Not Handling Connection Errors

const mongoose = require('mongoose');

async function connectDB() {
  try {
    await mongoose.connect(process.env.MONGODB_URI, {
      maxPoolSize: 10,
      serverSelectionTimeoutMS: 5000,
      socketTimeoutMS: 45000,
    });
    console.log('MongoDB connected');
  } catch (err) {
    console.error('MongoDB connection error:', err);
    process.exit(1);
  }
}

mongoose.connection.on('disconnected', () => {
  console.warn('MongoDB disconnected. Attempting reconnect...');
});

mongoose.connection.on('error', (err) => {
  console.error('MongoDB error:', err);
});

// Graceful shutdown
process.on('SIGTERM', async () => {
  await mongoose.connection.close();
  process.exit(0);
});

Performance Tips

Use select() to return only needed fields — reduces network transfer and memory
Use lean() for read-only queries — skips hydration overhead
Create compound indexes matching your query patterns — check with explain()
Avoid $where and JavaScript execution in queries — can’t use indexes
Use bulkWrite() for batch operations — reduces round trips
Set maxPoolSize based on your workload — default 5 is often too low

// Bulk operations
await User.bulkWrite([
  { updateOne: { filter: { _id: id1 }, update: { $set: { role: 'admin' } } } },
  { updateOne: { filter: { _id: id2 }, update: { $inc: { loginCount: 1 } } } },
  { deleteOne: { filter: { _id: id3 } } },
]);

MongoDB with Mongoose is powerful when you design schemas around access patterns. The key mistakes to avoid: unbounded arrays, missing indexes, and over-populating when aggregation would be more efficient.